﻿/*  
cd /projects/hsieh_project/proj_201809/code_1_data/
qsas data_1_top_ind_firms_agg_sum.sas 5 &
*/

/*
Author: Adarsh Kumar
Objective: To calculate, for all industry-firms, the number of industries it serves and 
its within-firm HHI.
Also identify each firm's pctile and rank by employment and # estab
Then find mean # industries per firm and HHI, for top 1, 10% firms and for all firms in industry
*/

libname hr "/projects/hsieh_project/proj_201809/data/";

%Let dir_out = /projects/hsieh_project/proj_201809/data/;

/*---------------------------------------------------------*/
/* Load Raw Data: Loading 5-yearly LBD Data w/o czone, msacz 
/*---------------------------------------------------------*/

%include "/projects/hsieh_project/proj_201809/code_1_data/m_read.sas" /source2;
%m_read(param_dev=1,param_lyear=1977 1982 1987 1992 1997 2002 2007 2013,param_czone=0,param_msa1983=1,param_msacz=0,param_drop=1);

%include "/projects/hsieh_project/code_0_general/m_perc_by_var.sas" /source2;

/*
Calculate each firm's total emp to later merge into to calculate it's within firm HHI
*/

data dt_firm_emp;
  set lbd;
run;

proc sort data=dt_firm_emp; by year firmnum;
run;

proc means data=dt_firm_emp noprint;
  by year firmnum;
  output out=dt_firm_emp(drop= _type_ _freq_) sum(worker)=emp_agg_f;
run;

/*
Collapse to year-ch_ind-firm level to identify n_ind / in-firm HHI for industry-firms
*/

data dt_ind_firm;
  set lbd;
run;

proc sort data=dt_ind_firm; by year ch_ind firmnum;

proc means data=dt_ind_firm noprint;
  by year ch_ind firmnum;
  output out=dt_ind_firm(drop = _type_ _freq_) sum(worker est)=emp_agg_ind_f est_agg_ind_f;
run;

/*Merge dt_firm_emp with dt_ind_firm to get denominator for HHI calculation*/

proc sort data=dt_ind_firm;
  by year firmnum;
run;

proc sort data=dt_firm_emp;
  by year firmnum;
run;

data dt_ind_firm;
  merge dt_ind_firm dt_firm_emp;
  by year firmnum;
run;

data dt_ind_firm;
  set dt_ind_firm; 
  share_ind_f = (emp_agg_ind_f / emp_agg_f);
  hhi_ind_f = (emp_agg_ind_f / emp_agg_f)**2;
  nind_ind_f = 1;
run;

/*Collapse dt_ind_firm to firm level to get within-firm HHI and # ch_ind served by firm*/

proc sort data=dt_ind_firm;
  by year firmnum;
run;

proc means data=dt_ind_firm noprint;
  by year firmnum; 
  output out=dt_firm(drop = _type_ _freq_) sum(hhi_ind_f nind_ind_f share_ind_f)=hhi_f nind_f share_f; 
run;

/*Merge dt_firm (which contains within-firm HHI and # ind served by firm) with dt_ind_firm */
proc sort data=dt_ind_firm;
  by year firmnum;
run;

proc sort data=dt_firm;
  by year firmnum;
run;

data dt_ind_firm;
  merge dt_ind_firm dt_firm;
  by year firmnum;
run;

/* 
dt_ind_firm now contains for each industry-firm, that firm's within-firm HHI, and number of ch_ind served by firm.
Now, we need to find percentile of firm within the industry using different definitions: by employment, 
establishments and MSA1983.
*/

%macro ind_firm_perc(dt_out = , ivar = , ivaro = );

%put Name of Output Data &dt_out.;
%put Variable to find top firms &ivar.;
%put Prefix of var-name with percentile &ivaro;

%if "&ivar." = "worker" %then %do;
%put Employment used to define top firms;
%put Set data to LBD;
data dt_var;
  set lbd;
run;
%end;

%if "&ivar." ~= "worker" %then %do;
%put Mkt variable used to define top firm;
%put Keeping first obs of Year Ind Firm Geo variable;
data dt_var;
  set lbd;
run;
proc sort data=dt_var; by year ch_ind firmnum &ivar.;
run;
data dt_var;
  set dt_var;
  by year ch_ind firmnum &ivar.;
  if first.&ivar.;
  &ivaro.=1;
run;
%end;
  
%m_perc_by_var(ds_in=dt_var, ds_out=dt_perc, var_in=&ivaro., var_by=%bquote(year ch_ind), var_unit=firmnum, var_out=&ivaro., var_runif = runif);

proc sort data=dt_ind_firm;
  by year ch_ind firmnum;
run;

proc sort data=dt_perc;
  by year ch_ind firmnum;
run;

data dt_ind_firm_perc;
  merge dt_ind_firm dt_perc;
  by year ch_ind firmnum;
run;

data dt_ind_firm_perc;
  set dt_ind_firm_perc;
  n_firm=1;
run;

/*
Collapse data by year ch_ind to get SUM of # industries per firm for all firms, top 1,10% and HHI for all, top 1,10% firms
*/

%Let l_perc=1 10;
%Let l_ind_var = hhi_f nind_f n_firm;

%Let i_list = 1;
%do %while (%scan(%bquote(&l_perc), &i_list) ~= );
  %Let i_perc=%scan(%bquote(&l_perc), &i_list.);
  %put Top &i_perc.% Industry Firms;
  
  data dt_ind_firm_perc;
    set dt_ind_firm_perc;
    hhi_f_&ivaro._&i_perc.=0;
    nind_f_&ivaro._&i_perc.=0;
    n_firm_&ivaro._&i_perc.=0;
    
    if &ivaro._perc <= %sysevalf(&i_perc. / 100) then 
      do;
	hhi_f_&ivaro._&i_perc.= hhi_f;
	nind_f_&ivaro._&i_perc. = nind_f;
	n_firm_&ivaro._&i_perc. = 1;
      end;
  run;
  
  %Let l_ind_var = &l_ind_var. hhi_f_&ivaro._&i_perc. nind_f_&ivaro._&i_perc. n_firm_&ivaro._&i_perc.;
  %Let i_list = %eval(&i_list + 1);
%end;

%put List of Variables for Top Firms;
%put &l_ind_var.;

/*
--------------------------------------------------------------------------------
Aggregate to year-industry level
*/

proc sort data=dt_ind_firm_perc; by year ch_ind;

proc means data=dt_ind_firm_perc noprint;
  by year ch_ind;
  output out=dti_out(drop=_type_ _freq_) sum(&l_ind_var.)=&l_ind_var.;
run;

%if %sysfunc(exist(dt_out)) %then %do;
  data dt_out;
    merge dt_out dti_out;
    by year ch_ind;
  run;
%end;
%else %do;
  data dt_out;
    set dti_out;
  run;
%end;

%mend;

/*
Call to macro
*/

%ind_firm_perc(dt_out = dt_perc_emp, ivar = worker, ivaro = worker);
%ind_firm_perc(dt_out = dt_perc_est, ivar = lbdid, ivaro = n_est);
%ind_firm_perc(dt_out = dt_perc_msa1983, ivar = msa1983, ivaro = n_msa1983);


/*Export */
proc export data=dt_out outfile="&dir_out./top_indfirm_hhi_nind.dta" replace;
run;



